I had a video of a presentation that had artifacts from over compression. As I wanted to see what the slides said I thought it might be possible to average out the artifacts. The following is an exploration of using an Ipython notebook to do so.
from IPython.display import YouTubeVideo
YouTubeVideo('dJlHBS7sP_c',width=640, height=480)
Taking advantage of an optimized avconv for saving frames as PNGs
cd frames
%time !avconv -i vid.mp4 -f image2 Out%05d.png
import os, os.path
import cv2
import pandas as pd
import numpy as np
from itertools import imap, islice, ifilter
%pylab inline
# Set the default image size to the correct aspect ratio
plt.rcParams['figure.figsize'] = (15,12)
window(seq, window_width=1) : Given an iterable seq, return an iterator that has a sliding window_width-tuple of consecutive items returned by seq.
def window(seq, window_width=1):
"""
Returns a sliding window (of width window_width) over data from the
iterable
s -> (s0,s1,...s[window_width-1]), (s1,s2,...,swindow_width), ...
"""
i = iter(seq)
result = tuple(islice(i, window_width))
if len(result) == window_width:
yield result
for elem in i:
result = result[1:] + (elem,)
yield result
t_imread(t) : Returns a function that will return an image at the path passed to it as type t.
def t_imread(t):
"""
Returns a function that will return an image at the path passed to it
as type t
"""
return lambda iname: cv2.imread(iname).astype(t)
sqrdiffp(pair) : Returns the sum of the squared differences of all the channels on the pair of images in the tuple pair.
def sqrdiffp(pair):
"""
Returns the sum of the squared differences of all the channels on the
pair of images in the tuple pair
"""
return ((pair[0]-pair[1])**2).sum()
fname(num, prefix="Out", postfix=".png", maxlen=5) : Constructs and returns a sequenced image file name, padded to maxlen.
def fname(num, prefix="Out", postfix=".png", maxlen=5):
"""
Constructs and returns a sequenced image file name, padded to maxlen
"""
return prefix + str(num).zfill(maxlen) + postfix
beg = 1
end = 16666
inames = imap(fname, xrange(beg, end+1))
iimage = imap(t_imread(np.int16), inames)
ipairimage = window(iimage, 2)
isqrdiff = imap(sqrdiffp, ipairimage)
%time sqrdiff = list(isqrdiff)
p = pd.Series(sqrdiff)
ax = p.plot(figsize=(14,8), logy=True)
ax.set_xlabel("Pair of Frame Numbers start")
_ = ax.set_ylabel("Sum of Squared Difference")
p.describe()
p.hist(bins=121, figsize=(14,4))
#display the square of differences above and below the cutoff
cutoff = p.quantile(0.994)
fig, axes = plt.subplots(sharex=True, nrows=2, ncols=1,
figsize=(14, 4))
p.plot(ylim=[cutoff, 0.7e8], title="Above Cutoff", ax=axes[0])
p.plot(ylim=[0, cutoff], title="Below Cutoff", ax=axes[1])
axes[1].set_xlabel("Pair of Frame Numbers start")
fig.text(0.09, 0.5, 'Sum of Squared Difference',
ha='center', va='center', rotation='vertical')
Add the end scenes to the list of scenes with high change
# Create a list of entries whose sum of square difference is above the cutoff
# add the first frame and last frame a scene boundaries
o = np.concatenate(([1], p[p>cutoff].index.values, [end]))
print(o)
I found there was some scene zooming, we remove these frames by only including noisy scenes if they occur at least 90 frames after the last noisy scene
calmthresh = 90
calmderiv = list(ifilter(lambda x: x[1]-x[0] > calmthresh, window(o,2)))
print(calmderiv)
Slides had transitions, so trim 5 frames off of each scene border
border = 5
calm = map(lambda x: (x[0]+border,x[1]-border), calmderiv)
print calm
def scene_ave(scene_ranges, ysize = 480, xsize = 600, channels = 3):
"""
return an image cube whith a layer for each range tuple in the
scene_ranges list. And each pixel in that layer is the average of the
corresponding pixels in each frame of the range.
"""
scenes = len(scene_ranges)
# create an image reading function
im32 = t_imread(np.uint32)
# Create a cube of images that has 1 image for each scene
scube = np.ndarray((scenes, ysize*2, xsize*2, channels), dtype='uint8')
for s, (x,y) in enumerate(scene_ranges):
# create a temporary image cube for all the frames in a scene
cube = np.ndarray((y-x, ysize, xsize, channels), dtype='uint8')
for i in xrange(x, y):
cube[i-x,:,:,:] = im32(fname(i))
# average the channels across the appropriate dimension
ave = cube.mean(axis=0).round().astype(np.uint8)
# resize the image for display
scube[s] = cv2.resize(ave, (2*xsize, 2*ysize))
return scube
%time scube = scene_ave(calm)
def showbgr(img, title=None):
"""
Display the opencv image appropriately.
"""
if title:
plt.title(title)
return plt.imshow(cv2.cvtColor(img, cv2.cv.CV_BGR2RGB))
def unsharpMask(img, alpha=3.5, beta=-2.5, gamma=.5,
ksize=(11,11), sigmaX=0, sigmaY=0):
"""
Return the image with the appropriate unsharp mask applied.
XXX: Need to document the book that taught me this.
"""
# Gausian Blur the image
imgblur = cv2.GaussianBlur(img, ksize, sigmaX, sigmaY)
# subtract a multiple of the blurred image from an amplified original
return cv2.addWeighted(img, alpha, imgblur, beta, gamma)
def autoLevel(img):
"""
Return the image normalized to the brightest and darkest pixel
out of all the channels.
"""
i = img.astype(np.double)
return ((i-i.min())*255/(i.max())).astype(np.uint8)
for ind, img in enumerate(scube):
plt.figure()
ax = showbgr(autoLevel(unsharpMask(img)), "scube[" + str(ind) + "]")
ax.get_axes().axis('off')